from random import betavariate
import requests
from bs4 import BeautifulSoup
import json
import re



class Hcity(object):
    def headers(self):
        self.header = {
            'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0) Gecko/20100101 Firefox/88.0'}
        self.url = 'https://jobs.51job.com/'
        self.html = requests.get(self.url, headers=self.header, timeout=2000)
        self.soup = BeautifulSoup(self.html.text.encode('iso-8859-1').decode('gbk'),'html.parser')

    def find_data(self):
        self.headers()
        self.text = self.soup.find('div',attrs={'class':'e e4'}).find('div',class_='lkst')
        self.text2 = self.text.findAll('a')

        self.number = 0
        self.result = []
        print(len(self.text2))
        for self.list in self.text2:
            if self.text2 is None:
                return 
            # self.one = {}
            city = self.list.get_text().strip()
            # self.one[city] = self.list.get('href')
            for self.x in range(1,6):  #为了避免数据量过大，这里咱设置为爬取10页的数据
                self.one = {}
                self.one[city+'之p'+str(self.x)] = self.list.get('href') + 'p' + str(self.x)
                # self.result.append((self.one))
                self.number += 1
                self.result.append((self.one))
        print(self.result)
        print('一共'+str(self.number)+'页数据')

    def write_json(self):
        with open('hcity.json','w',encoding='utf-8') as self.file:
            self.file.write(json.dumps(self.result,indent=3,ensure_ascii=False))
            self.file.close()

     


if __name__ == '__main__':
    hcity = Hcity()
    hcity.find_data()
    # hcity.write_json()
